-
Notifications
You must be signed in to change notification settings - Fork 11.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Add split function support for the Linux kernel #90541
Conversation
@llvm/pr-subscribers-bolt Author: Maksim Panchenko (maksfb) ChangesWhile rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary. Full diff: https://github.com/llvm/llvm-project/pull/90541.diff 4 Files Affected:
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 8b1af9e8153925..75765819ac464e 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -20,6 +20,7 @@
#include "bolt/Core/JumpTable.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
+#include "llvm/ADT/AddressRanges.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/iterator.h"
@@ -726,6 +727,9 @@ class BinaryContext {
uint64_t OldTextSectionOffset{0};
uint64_t OldTextSectionSize{0};
+ /// Area in the input binary reserved for BOLT.
+ AddressRange BOLTReserved;
+
/// Address of the code/function that is executed before any other code in
/// the binary.
std::optional<uint64_t> StartFunctionAddress;
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index f9e634d15a9724..bd0b6dea0e065a 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -715,6 +715,12 @@ Error SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return Error::success();
+ if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) {
+ BC.errs() << "BOLT-ERROR: split functions require reserved space in the "
+ "Linux kernel binary\n";
+ exit(1);
+ }
+
// If split strategy is not CDSplit, then a second run of the pass is not
// needed after function reordering.
if (BC.HasFinalizedFunctionOrder &&
@@ -829,6 +835,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}
}
+
+ // Outlining blocks with dynamic branches is not supported yet.
+ if (BC.IsLinuxKernel) {
+ if (llvm::any_of(
+ *BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); }))
+ BB->setCanOutline(false);
+ }
}
BF.getLayout().updateLayoutIndices();
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index d96199e020d31a..1fe984748c0231 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -248,6 +248,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
/// Update ORC data in the binary.
Error rewriteORCTables();
+ /// Validate written ORC tables after binary emission.
+ Error validateORCTables();
+
/// Static call table handling.
Error readStaticCalls();
Error rewriteStaticCalls();
@@ -358,6 +361,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = updateStaticKeysJumpTablePostEmit())
return E;
+ if (Error E = validateORCTables())
+ return E;
+
return Error::success();
}
};
@@ -777,11 +783,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
};
// Emit new ORC entries for the emitted function.
- auto emitORC = [&](const BinaryFunction &BF) -> Error {
- assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
-
+ auto emitORC = [&](const FunctionFragment &FF) -> Error {
ORCState CurrentState = NullORC;
- for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
+ for (BinaryBasicBlock *BB : FF) {
for (MCInst &Inst : *BB) {
ErrorOr<ORCState> ErrorOrState =
BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
@@ -802,7 +806,36 @@ Error LinuxKernelRewriter::rewriteORCTables() {
return Error::success();
};
+ // Emit ORC entries for cold fragments. We assume that these fragments are
+ // emitted contiguously in memory using reserved space in the kernel. This
+ // assumption is validated in post-emit pass validateORCTables() where we
+ // check that ORC entries are sorted by their addresses.
+ auto emitColdORC = [&]() -> Error {
+ for (BinaryFunction &BF :
+ llvm::make_second_range(BC.getBinaryFunctions())) {
+ if (!BC.shouldEmit(BF))
+ continue;
+ for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
+ if (Error E = emitORC(FF))
+ return E;
+ }
+
+ return Error::success();
+ };
+
+ bool ShouldEmitCold = !BC.BOLTReserved.empty();
for (ORCListEntry &Entry : ORCEntries) {
+ if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
+ if (Error E = emitColdORC())
+ return E;
+
+ // Emit terminator entry at the end of the reserved region.
+ if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
+ return E;
+
+ ShouldEmitCold = false;
+ }
+
// Emit original entries for functions that we haven't modified.
if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
// Emit terminator only if it marks the start of a function.
@@ -816,7 +849,7 @@ Error LinuxKernelRewriter::rewriteORCTables() {
// Emit all ORC entries for a function referenced by an entry and skip over
// the rest of entries for this function by resetting its ORC attribute.
if (Entry.BF->hasORC()) {
- if (Error E = emitORC(*Entry.BF))
+ if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
return E;
Entry.BF->setHasORC(false);
}
@@ -825,10 +858,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
<< " ORC entries\n");
- // Replicate terminator entry at the end of sections to match the original
- // table sizes.
- const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
- const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
+ // Populate ORC tables with a terminator entry with max address to match the
+ // original table sizes.
+ const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
while (UnwindWriter.bytesRemaining()) {
if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
return E;
@@ -837,6 +869,31 @@ Error LinuxKernelRewriter::rewriteORCTables() {
return Error::success();
}
+Error LinuxKernelRewriter::validateORCTables() {
+ if (!ORCUnwindIPSection)
+ return Error::success();
+
+ const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
+ DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(),
+ BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor::Cursor IPCursor(0);
+ uint64_t PrevIP = 0;
+ for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
+ const uint64_t IP =
+ IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
+ if (!IPCursor)
+ return createStringError(errc::executable_format_error,
+ "out of bounds while reading ORC IP table: %s",
+ toString(IPCursor.takeError()).c_str());
+
+ assert(IP >= PrevIP && "Unsorted ORC table detected");
+ PrevIP = IP;
+ }
+
+ return Error::success();
+}
+
/// The static call site table is created by objtool and contains entries in the
/// following format:
///
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 644d87eeca42e6..9b555f2e1a80cf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3628,13 +3628,19 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
}
if (StartBD) {
+ if (StartBD->getAddress() >= EndBD->getAddress()) {
+ BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n";
+ exit(1);
+ }
+ BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress());
+ BC->outs()
+ << "BOLT-INFO: using reserved space for allocating new sections\n";
+
PHDRTableOffset = 0;
PHDRTableAddress = 0;
NewTextSegmentAddress = 0;
NewTextSegmentOffset = 0;
- NextAvailableAddress = StartBD->getAddress();
- BC->outs()
- << "BOLT-INFO: using reserved space for allocating new sections\n";
+ NextAvailableAddress = BC->BOLTReserved.start();
}
// If no new .eh_frame was written, remove relocated original .eh_frame.
@@ -3657,12 +3663,12 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
// Map the rest of the sections.
mapAllocatableSections(MapSection);
- if (StartBD) {
- const uint64_t ReservedSpace = EndBD->getAddress() - StartBD->getAddress();
- const uint64_t AllocatedSize = NextAvailableAddress - StartBD->getAddress();
- if (ReservedSpace < AllocatedSize) {
- BC->errs() << "BOLT-ERROR: reserved space (" << ReservedSpace << " byte"
- << (ReservedSpace == 1 ? "" : "s")
+ if (!BC->BOLTReserved.empty()) {
+ const uint64_t AllocatedSize =
+ NextAvailableAddress - BC->BOLTReserved.start();
+ if (BC->BOLTReserved.size() < AllocatedSize) {
+ BC->errs() << "BOLT-ERROR: reserved space (" << BC->BOLTReserved.size()
+ << " byte" << (BC->BOLTReserved.size() == 1 ? "" : "s")
<< ") is smaller than required for new allocations ("
<< AllocatedSize << " bytes)\n";
exit(1);
@@ -5853,13 +5859,11 @@ void RewriteInstance::writeEHFrameHeader() {
NextAvailableAddress += EHFrameHdrSec.getOutputSize();
- if (const BinaryData *ReservedEnd =
- BC->getBinaryDataByName(getBOLTReservedEnd())) {
- if (NextAvailableAddress > ReservedEnd->getAddress()) {
- BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
- << " into reserved space\n";
- exit(1);
- }
+ if (!BC->BOLTReserved.empty() &&
+ (NextAvailableAddress > BC->BOLTReserved.end())) {
+ BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
+ << " into reserved space\n";
+ exit(1);
}
// Merge new .eh_frame with the relocated original so that gdb can locate all
@@ -5893,7 +5897,7 @@ uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
// Check if it's possibly part of the new segment.
- if (Address >= NewTextSegmentAddress)
+ if (NewTextSegmentAddress && Address >= NewTextSegmentAddress)
return Address - NewTextSegmentAddress + NewTextSegmentOffset;
// Find an existing segment that matches the address.
|
FYI: moved new code for |
Good to go, both this one and #90766 |
While rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary.
While rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary.